In this analysis, we demonstrate that MitoTrace could identify the germline mutations on single-cell Smart-seq2 data which publicly available from Darmanis et al. (Darmanis et al. 2017).
Load R libraries
library(MitoTrace) library(pheatmap)
Read the BAM files, these BAM files can be downloaded from the link: (https://drive.google.com/open?id=1oGK9_RE5t2CkzFKlg24dhaKmItmgRmMt).
bams <- list.files("../Data/DarmanisEtAl/bam_file", full.names = T, pattern = ".bam$")
Read the human GRCH38 Mitochondrial reference genome
fasta_loc <- "../Data/GRCH38_MT.fa"
MitoTrace calculates alternative allele counts and read coverage for each nucleotide position
mae_res <- MitoTrace(bam_list = bams, fasta = fasta_loc)
MitoTrace calculate the allele frequencies
af <- calc_allele_frequency(mae_res) colnames(af) <- unlist(lapply(colnames(af), function(x) { a <- strsplit(x, "\\.") [[1]][1]}))
Read the relevant label files originated from the publication (Darmanis et al. 2017).
ssr_list <- read.table("../Data/DarmanisEtAl/annotation_file/SRR_Patient_all_list_sorted", sep = "\t") ok <- intersect(colnames(af), ssr_list$V1) ssr_list <- ssr_list[match(colnames(af), as.character(ssr_list$V1)),]
Read the plate relation file
plate_relation <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_SRA_plate_relation", sep = "\t", header = T) plate_relation <- plate_relation[match(colnames(af), as.character(plate_relation$sra)),]
Read metadata file
meta <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_metadata.csv")
Read the geo file
geo <- read.delim("../Data/DarmanisEtAl/annotation_file/FromGeoSpreadsheet.tsv", row.names = 1) geo <- geo[as.character(plate_relation$plate.well),]
Read the tSNE matrix
tsne <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_TSNE.csv")
Split by allele frequency
asplit <- split(as.character(ssr_list$V1), ssr_list$V2) sample_af_means <- do.call(cbind, lapply(asplit, function(x){ subm_af <- af[,x] rowMeans(subm_af, na.rm = T) })) cutoff <- 0.75 ok <- which(apply(sample_af_means, 1, function(x) sum(x > cutoff)) > 0)
Identify informative germline mutations using AOV
pvals <- apply(af[ok,], 1, function(x) try(summary(aov(x ~ geo$V8))[[1]][1, 5])) ok <- names(which(pvals < 1e-100))
Generate the heatmap to plot the data
asplit <- split(colnames(af), geo$V8) samples_ok <- unlist(lapply(asplit, function(x) sample(x, 200))) indices_ok <- match(samples_ok, colnames(af)) tmp <- af[ok, indices_ok] anno_col <- data.frame(patient = geo$V8[indices_ok]) rownames(anno_col) <- colnames(tmp) pheatmap(tmp, show_colnames = F, annotation_col = anno_col, cluster_cols = T)
Highlight some identified germline mutation sites
par(mfrow = c(1,4)) boxplot(split(tmp["11864T>C",], anno_col$patient == "BT_S1"), main = "11864T>C", xlab = "BT_S1", ylab = "Alternative allele frequency") boxplot(split(tmp["4924G>A",], anno_col$patient == "BT_S2"), main = "4924G>A", xlab = "BT_S2", ylab = "Alternative allele frequency") boxplot(split(tmp["4790A>G",], anno_col$patient == "BT_S4"), main = "4790A>G", xlab = "BT_S4", ylab = "Alternative allele frequency") boxplot(split(tmp["709G>A",], anno_col$patient == "BT_S6"), main = "709G>A", xlab = "BT_S6", ylab = "Alternative allele frequency") par(mfrow = c(1,1))
tSNE visualization of the cell-cell distance matrix by mitochondrial heteroplasmy profiles separated 4 patients.
vars <- apply(af, 1, var) ok <- names(tail(sort(vars), 500)) tmp <- data.matrix(af[ok,]) tmp <- tmp[,which(apply(tmp, 2, var) > 0)] correl <- cor(tmp) tData <- Rtsne::Rtsne(1 - abs(correl), perplexity = 20) plot(tData$Y, col = as.character(meta$Sample.name.color), main = "Patient", pch =20)
tSNE visualization of the cell-cell distance matrix by cell type identify
tsne <- tsne[rownames(meta),] plot(tsne, col = as.character(meta$Sample.name.color), pch = 16, xlab = "tSNE1", ylab = "tSNE2")
tSNE visualization of the cell cluster by cell type
plot(tsne, col = as.character(meta$Cluster_2d_color), pch = 16, xlab = "tSNE1", ylab = "tSNE2")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.